Flowchart

library(GENIE3)
library(doParallel)
library(igraph)
library(tidyverse)
library(DT)
library(reticulate)
library(learn2count)
library(rbenchmark)
library(reshape2)
library(gridExtra)
library(DiagrammeR)
library(pROC)
library(JRF)
library(DiagrammeRsvg)
library(rsvg)
library(RColorBrewer)
library(rbenchmark)
library(ZILGM)

use_python("/usr/bin/python3", required = TRUE)
arboreto <- import("arboreto.algo")
pandas <- import("pandas")
numpy <- import("numpy")

execution_times <- list()
source("generate_adjacency.R")
source("symmetrize.R")
source("pscores.R")
source("plotg.R")
source("compare_consensus.R")
source("create_consensus.R")
source("earlyj.R")
source("plotROC.R")
source("cutoff_adjacency.R")
source("infer_networks.R")
grViz_output <- DiagrammeR::grViz("
digraph biological_workflow {
  # Set up the graph attributes
  graph [layout = dot, rankdir = TB]

  # Define consistent node styles
  node [shape = rectangle, style = filled, color = lightblue, fontsize = 12]

  # Define nodes for each step
  StartNode [label = 'Ground Thruth - String Regulatory Network', shape = oval, color = seagreen, fontcolor = black]
  AdjacencyMatrix [label = 'Thruth Adjacency Matrix', shape = rectangle, color = seagreen]
  SimulateData [label = 'Simulate Single-Cell Data', shape = rectangle, color = goldenrod]

  # Reconstruction using Three Packages
  LateIntegration [label = 'Late\nIntegration', shape = oval, color = khaki]
  EarlyIntegration [label = 'Early\nIntegration', shape = oval, color = khaki]
  Jointanalysis [label = 'Joint\nanalysis', shape = oval, color = khaki]
  

  # Process 
  earlyj [label = 'earlyj.R', shape=diamond, color=lightblue, fontcolor=black]
  networkinference [label = 'infer_networks.R\nGENIE3\nGRNBoost2\nZILGM\nJRF', shape = rectangle, color = goldenrod, fontcolor=black]
  symmetrize [label = 'symmetrize.R', shape = rectangle, color = goldenrod, fontcolor=black]
  plotROC [label = 'plotROC.R', shape=diamond, color=lightblue, fontcolor=black]
  generateadjacency [label='generate_adjacency.R\nWeighted Adjacency', shape=rectangle, color=goldenrod, fontcolor=black]
  cutoffadjacency [label='cutoff_adjacency.R\nBinary Adjacency', shape=rectangle, color=goldenrod, fontcolor=black]
  pscores [label='pscores.R\nTPR\nFPR\nF1\nAccuracy\nPrecision', shape=diamond, color=lightblue, fontcolor=black]
  voting [label='Voting\nUnion\nIntersection', shape=diamond, color=lightblue, fontcolor=black]
  plotgcompare  [label='plotg.R\ncompare_consesus.R\nPlot Graphs', shape=rectangle, color=goldenrod, fontcolor=black]

  # Define the workflow structure
  StartNode -> AdjacencyMatrix
  AdjacencyMatrix -> SimulateData
  SimulateData -> LateIntegration
  SimulateData -> EarlyIntegration
  SimulateData -> Jointanalysis
  EarlyIntegration -> earlyj
  earlyj -> networkinference
  LateIntegration -> networkinference
  Jointanalysis -> networkinference
  networkinference -> symmetrize
  symmetrize -> plotROC
  symmetrize -> generateadjacency
  generateadjacency -> cutoffadjacency
  cutoffadjacency -> pscores
  cutoffadjacency -> voting
  voting -> plotgcompare
  voting -> pscores
}
")

svg_code <- export_svg(grViz_output)
rsvg::rsvg_png(charToRaw(svg_code), "./../analysis/flowchart.png")

grViz_output

Tcell Ground Truth

adjm <- read.table("./../data/adjacency_matrix.csv", header = T, row.names = 1, sep = ",") %>% as.matrix()
diag(adjm) <- 0

adjm %>%
    datatable(extensions = 'Buttons',
            options = list(
              dom = 'Bfrtip',
              buttons = c('csv', 'excel'),
              scrollX = TRUE,
              pageLength = 10), 
            caption = "Ground Truth")
gtruth <- igraph::graph_from_adjacency_matrix(adjm, mode = "undirected", diag = F)

num_nodes <- vcount(gtruth)
num_edges <- ecount(gtruth)

set.seed(1234)
plot(gtruth, 
     main = paste("Ground Truth\nNodes:", num_nodes, "Edges:", num_edges),
     vertex.label.color = "black",
     vertex.size = 6, 
     edge.width = 2, 
     vertex.label = NA,
     vertex.color = "steelblue",
     layout = igraph::layout_with_fr)

Simulate Data

ncell <- 500
nodes <- nrow(adjm)

set.seed(1130)
mu_values <- c(3, 6, 9)

count_matrices <- lapply(1:3, function(i) {
  set.seed(1130 + i)
  mu_i <- mu_values[i]
  
  count_matrix_i <- simdata(n = ncell, p = nodes, B = adjm, family = "ZINB", 
                            mu = mu_i, mu_noise = 1, theta = 0.5, pi = 0.2)
  
  count_matrix_df <- as.data.frame(count_matrix_i)
  colnames(count_matrix_df) <- colnames(adjm)
  rownames(count_matrix_df) <- paste("cell", 1:nrow(count_matrix_df), sep = "")
  
  return(count_matrix_df)
})

count_matrices[[1]] %>%
    datatable(extensions = 'Buttons',
            options = list(
              dom = 'Bfrtip',
              buttons = c('csv', 'excel'),
              scrollX = TRUE,
              pageLength = 10), 
            caption = "Simulated count matrix")
saveRDS(count_matrices, "./../analysis/count_matrices.RDS")

Matrices Integration

Late Integration

GENIE3

set.seed(1234)
tictoc::tic("GENIE3 late")
genie3_late <- infer_networks(count_matrices, method="GENIE3")
saveRDS(genie3_late, "./../analysis/genie3_late.RDS")
execution_times[['GENIE3 late']] <- tictoc::toc(log = TRUE)$toc[[1]]
## GENIE3 late: 125.242 sec elapsed
genie3_late[[1]] %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "GENIE3 output")

symmetrize Output and ROC

genie3_late_wadj <- generate_adjacency(genie3_late, ground.truth = adjm)
sgenie3_late_wadj <- symmetrize(genie3_late_wadj, weight_function = "mean")
plotROC(sgenie3_late_wadj, adjm, plot_title = "ROC curve - GENIE3 Late Integration")

sgenie3_late_wadj[[1]] %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "GENIE3 symmetrize output")

Generate Adjacency and Apply Cutoff

sgenie3_late_adj <- cutoff_adjacency(count_matrices = count_matrices,
                 weighted_adjm_list = sgenie3_late_wadj, 
                 ground.truth = adjm,
                 n = 2,
                 method = "GENIE3")
## Matrix 1 Mean 95th Percentile Cutoff: 0.009957001 
## Matrix 2 Mean 95th Percentile Cutoff: 0.00990153 
## Matrix 3 Mean 95th Percentile Cutoff: 0.00968373
sgenie3_late_adj[[1]] %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "GENIE3 adjacency")

Comparison with the Ground Truth

scores <- pscores(adjm, sgenie3_late_adj)

scores$Statistics %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "scores")
plots <- plotg(sgenie3_late_adj)

consesusm <- create_consensus(sgenie3_late_adj, method="vote")
consesusu <- create_consensus(sgenie3_late_adj, method="union")

scores <- pscores(adjm, list(consesusm))

scoresu <- pscores(adjm, list(consesusu))

scores$Statistics %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "scores")
scoresu$Statistics %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "scores")
ajm_compared <- compare_consensus(consesusm, adjm)

ajm_compared <- compare_consensus(consesusu, adjm)

GRNBoost2

set.seed(1234)
tictoc::tic("GRNBoost2 late")
grnb_late <- infer_networks(count_matrices, method="GRNBoost2")
saveRDS(grnb_late, "./../analysis/grnb_late.RDS")
execution_times[['GRNBoost2 late']] <- tictoc::toc(log = TRUE)$toc[[1]]
## GRNBoost2 late: 8.704 sec elapsed
grnb_late[[1]] %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "GRNBoost2 output")

symmetrize Output and ROC

grnb_late_wadj <- generate_adjacency(grnb_late, ground.truth = adjm)
sgrnb_late_wadj <- symmetrize(grnb_late_wadj, weight_function = "mean")
plotROC(sgrnb_late_wadj, adjm, plot_title = "ROC curve - GRNBoost2 Late Integration")

sgrnb_late_wadj[[1]] %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "GRNBoost2 symmetrize output")

Generate Adjacency and Apply Cutoff

sgrnb_late_adj <- cutoff_adjacency(count_matrices = count_matrices,
                 weighted_adjm_list = sgrnb_late_wadj, 
                 ground.truth = adjm,
                 n = 2,
                 method = "GRNBoost2")
## Matrix 1 Mean 95th Percentile Cutoff: 0.861415 
## Matrix 2 Mean 95th Percentile Cutoff: 0.8786935 
## Matrix 3 Mean 95th Percentile Cutoff: 0.8691662
sgrnb_late_adj[[1]] %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "GRNBoost2 adjacency")

Comparison with the Ground Truth

scores <- pscores(adjm, sgrnb_late_adj)

scores$Statistics %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "scores")
plots <- plotg(sgrnb_late_adj)

consesusm <- create_consensus(sgrnb_late_adj, method="vote")
consesusu <- create_consensus(sgrnb_late_adj, method="union")

scores <- pscores(adjm, list(consesusm))

scoresu <- pscores(adjm, list(consesusu))

scores$Statistics %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "scores")
scoresu$Statistics %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "scores")
ajm_compared <- compare_consensus(consesusm, adjm)

ajm_compared <- compare_consensus(consesusu, adjm)

ZILGM Park

set.seed(123)
tictoc::tic("ZILGM late")
est_graphs <- list()

for (i in seq_along(count_matrices)) {
  lambda_max <- find_lammax(as.matrix(count_matrices[[i]]))
  lambda_min <- 1e-4 * lambda_max
  lambs <- exp(seq(log(lambda_max), log(lambda_min), length.out = 50))
  
  nb2_fit <- zilgm(X = as.matrix(count_matrices[[i]]), lambda = lambs, family = "NBII",
                   update_type = "IRLS", do_boot = TRUE, boot_num = 10, sym = "OR", nCores = 15)
  
  est_graphs[[i]] <- nb2_fit$network[[nb2_fit$opt_index]]
}
## learning for NBII graphical model 
## nlambda : 50
## penalty function : LASSO
## update type : IRLS
## Conducting sampling in progress :  10 % 
Conducting sampling in progress :  20 % 
Conducting sampling in progress :  30 % 
Conducting sampling in progress :  40 % 
Conducting sampling in progress :  50 % 
Conducting sampling in progress :  60 % 
Conducting sampling in progress :  70 % 
Conducting sampling in progress :  80 % 
Conducting sampling in progress :  90 % 
Conducting sampling in progress :  100 % 
learning for NBII graphical model 
## nlambda : 50
## penalty function : LASSO
## update type : IRLS
## Conducting sampling in progress :  10 % 
Conducting sampling in progress :  20 % 
Conducting sampling in progress :  30 % 
Conducting sampling in progress :  40 % 
Conducting sampling in progress :  50 % 
Conducting sampling in progress :  60 % 
Conducting sampling in progress :  70 % 
Conducting sampling in progress :  80 % 
Conducting sampling in progress :  90 % 
Conducting sampling in progress :  100 % 
learning for NBII graphical model 
## nlambda : 50
## penalty function : LASSO
## update type : IRLS
## Conducting sampling in progress :  10 % 
Conducting sampling in progress :  20 % 
Conducting sampling in progress :  30 % 
Conducting sampling in progress :  40 % 
Conducting sampling in progress :  50 % 
Conducting sampling in progress :  60 % 
Conducting sampling in progress :  70 % 
Conducting sampling in progress :  80 % 
Conducting sampling in progress :  90 % 
Conducting sampling in progress :  100 % 
execution_times[['ZILGM late']] <- tictoc::toc(log = TRUE)$toc[[1]]
## ZILGM late: 3888.914 sec elapsed

Comparison with the Ground Truth

est_graphs <- list(as.matrix(est_graphs[[1]]), as.matrix(est_graphs[[2]]), as.matrix(est_graphs[[3]]))

rownames(est_graphs[[1]]) <- rownames(adjm)
colnames(est_graphs[[1]]) <- colnames(adjm)
rownames(est_graphs[[3]]) <- rownames(adjm)
colnames(est_graphs[[3]]) <- colnames(adjm)
rownames(est_graphs[[2]]) <- rownames(adjm)
colnames(est_graphs[[2]]) <- colnames(adjm)
scores <- pscores(adjm, est_graphs)

scoresu <- pscores(adjm, est_graphs)

scores$Statistics %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "scores")
scoresu$Statistics %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "scores")
plots <- plotg(est_graphs)

consesusm <- create_consensus(est_graphs, method="vote")
consesusu <- create_consensus(est_graphs, method="union")

scores <- pscores(adjm, list(consesusm))

scoresu <- pscores(adjm, list(consesusu))

scores$Statistics %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "scores")
scoresu$Statistics %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "scores")
ajm_compared <- compare_consensus(consesusm, adjm)

ajm_compared <- compare_consensus(consesusu, adjm)

Early Integration

early_matrix <- list(earlyj(count_matrices))

GENIE3

set.seed(1234)
tictoc::tic("GENIE3 early")
genie3_early <- infer_networks(early_matrix, method="GENIE3")
execution_times[['GENIE3 early']] <- tictoc::toc(log = TRUE)$toc[[1]]
## GENIE3 early: 151.435 sec elapsed
saveRDS(genie3_early, "./../analysis/genie3_early.RDS")

genie3_early[[1]] %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "GENIE3 output")

symmetrize Output and ROC

genie3_early_wadj <- generate_adjacency(genie3_early, ground.truth = adjm)
sgenie3_early_wadj <- symmetrize(genie3_early_wadj, weight_function = "mean")
plotROC(sgenie3_early_wadj, adjm, plot_title = "ROC curve - GENIE3 Early Integration")

sgenie3_early_wadj[[1]] %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "GENIE3 symmetrize output")

Generate Adjacency and Apply Cutoff

sgenie3_early_adj <- cutoff_adjacency(count_matrices = early_matrix,
                 weighted_adjm_list = sgenie3_early_wadj, 
                 ground.truth = adjm,
                 n = 2,
                 method = "GENIE3")
## Matrix 1 Mean 95th Percentile Cutoff: 0.01013741
sgenie3_early_adj[[1]] %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "GENIE3 adjacency")

Comparison with the Ground Truth

scores <- pscores(adjm, sgenie3_early_adj)

scores$Statistics %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "scores")
plots <- plotg(sgenie3_early_adj)

ajm_compared <- compare_consensus(sgenie3_early_adj[[1]], adjm)

GRNBoost2

set.seed(1234)
tictoc::tic("GRNBoost2 early")
grnb_early <- infer_networks(early_matrix, method="GRNBoost2")
execution_times[['GRNBoost2 early']] <- tictoc::toc(log = TRUE)$toc[[1]]
## GRNBoost2 early: 11.525 sec elapsed
saveRDS(grnb_early, "./../analysis/grnb_early.RDS")

grnb_early[[1]] %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "GRNBoost2 output")

symmetrize Output and ROC

grnb_early_wadj <- generate_adjacency(grnb_early, ground.truth = adjm)
sgrnb_early_wadj <- symmetrize(grnb_early_wadj, weight_function = "mean")
plotROC(sgrnb_early_wadj, adjm, plot_title = "ROC curve - GRNBoost2 Early Integration")

grnb_early_wadj[[1]] %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "GRNBoost2 symmetrize output")

Generate Adjacency and Apply Cutoff

sgrnb_early_adj <- cutoff_adjacency(count_matrices = early_matrix,
                 weighted_adjm_list = sgrnb_early_wadj, 
                 ground.truth = adjm,
                 n = 2,
                 method = "GRNBoost2")
## Matrix 1 Mean 95th Percentile Cutoff: 4.243841
sgrnb_early_adj[[1]] %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "GRNBoost2 adjacency")

Comparison with the Ground Truth

scores <- pscores(adjm, sgrnb_early_adj)

scores$Statistics %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "scores")
plots <- plotg(sgrnb_early_adj)

ajm_compared <- compare_consensus(sgrnb_early_adj[[1]], adjm)

ZILGM Park

set.seed(123)

tictoc::tic("ZILGM early")
lambda_max = find_lammax(as.matrix(early_matrix[[1]]))
lambda_min = 1e-4 * lambda_max
lambs = exp(seq(log(lambda_max), log(lambda_min), length.out = 50))

nb2_fit = zilgm(X = as.matrix(early_matrix[[1]]), lambda = lambs, family = "NBII", update_type = "IRLS", do_boot = TRUE,
                  boot_num = 10, sym = "OR", nCores = 15)
## learning for NBII graphical model 
## nlambda : 50
## penalty function : LASSO
## update type : IRLS
## Conducting sampling in progress :  10 % 
Conducting sampling in progress :  20 % 
Conducting sampling in progress :  30 % 
Conducting sampling in progress :  40 % 
Conducting sampling in progress :  50 % 
Conducting sampling in progress :  60 % 
Conducting sampling in progress :  70 % 
Conducting sampling in progress :  80 % 
Conducting sampling in progress :  90 % 
Conducting sampling in progress :  100 % 
est_graph = nb2_fit$network[[nb2_fit$opt_index]]
execution_times[['ZILGM early']] <- tictoc::toc(log = TRUE)$toc[[1]]
## ZILGM early: 3470.381 sec elapsed

Comparison with the Ground Truth

scores <- pscores(adjm, list(as.matrix(est_graph)))

scores$Statistics %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "scores")
ajm_compared <- compare_consensus(as.matrix(est_graph), adjm)

Joint Integration

Joint Random Forest

#https://cran.r-project.org/src/contrib/Archive/JRF/
#install.packages("/home/francescoc/Downloads/JRF_0.1-4.tar.gz", repos = NULL, type = "source")
#jrf_mat <- infer_networks(count_matrices, method="JRF")

jrf_matrices <- lapply(count_matrices, t)
jrf_matrices_norm <- lapply(jrf_matrices,function(x) {
  (x - mean(x)) / sd(x)
  })

genes <- rownames(jrf_matrices_norm[[1]])

set.seed(1234)
tictoc::tic("JRF_out")
netout <- JRF(X = jrf_matrices_norm, 
              genes.name = genes, 
              ntree = 500, 
              mtry = round(sqrt(length(genes) - 1)))
execution_times[['JRF_out']] <- tictoc::toc(log = TRUE)$toc[[1]]
## JRF_out: 1186.872 sec elapsed
netout %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "JRF output")
out.perm <- Run_permutation(jrf_matrices_norm,mtry=round(sqrt(length(genes)-1)),ntree=500, genes,3)
#out <- JRF_permutation(jrf_matrices_norm,mtry=round(sqrt(length(genes)-1)),ntree=500,genes,2)

final.net <- JRF_network(netout,out.perm,0.001)
final.net
## [[1]]
## [[1]][[1]]
## [[1]][[1]][[1]]
##        gene1 gene2
## 341   EEF1B2 AHNAK
## 714     RPS5 APEX1
## 2992  FCGR3A  CD14
## 3225    CD3D   CD2
## 4106   CXCR4  CD3G
## 4609    GZMK   CD6
## 7756   KLRB1  GNLY
## 8438   RACK1 IGF1R
## 10659  PTPRC  PRF1
## 10796   TFRC PTPRC
## 
## 
## [[1]][[2]]
##          gene1    gene2
## 797        FGR ARHGAP25
## 1043     CASP8     BCL2
## 1702     PDE3B    CAMK4
## 3225      CD3D      CD2
## 3229       CD5      CD2
## 3267      GZMK      CD2
## 3277      IL7R      CD2
## 3852      CD3G     CD3D
## 3858      CD8A     CD3D
## 3892      GZMK     CD3D
## 3909     KLRD1     CD3D
## 4012      GZMB     CD3E
## 4179     PTPRC     CD3G
## 4838      GZMB     CD8A
## 4884     PTPRC     CD8A
## 5881      RPS8    EIF3E
## 6903     TIGIT    FCRL3
## 7459  TNFRSF18    FOXP3
## 8171     KLRB1     GZMK
## 8438     RACK1    IGF1R
## 8501      NKG7   IL10RA
## 10230   SPTBN1    NCAM1
## 10712 SLC9A3R1    PRKCA
## 10785     SELL    PTPRC
## 11411   TNFSF8  TNFRSF8
## 
## 
## [[2]]
## [1] gene1 gene2
## <0 rows> (or 0-length row.names)

Finalnet

myJRF_network <- function(out.jrf, out.perm, TH) {
  
  nclasses <- dim(out.perm)[3]
  M <- dim(out.perm)[2]
  out <- vector("list", nclasses)  # Initialize the list to store results for each class

  for (net in 1:nclasses) { 
    j.np <- sort(out.jrf[, 2 + net], decreasing = TRUE)
    FDR <- matrix(0, dim(out.perm)[1], 1)
    
    th <- NULL  # Initialize threshold variable
    for (s in 1:length(j.np)) { 
      FP <- sum(sum(out.perm[, , net] >= j.np[s])) / M
      FDR[s] <- FP / s
      
      if (FDR[s] > TH) {
        th <- j.np[s]
        break
      }
    }
    
    # Store the results for the current class in the `out` list
    out[[net]] <- out.jrf[out.jrf[, 2 + net] > th, seq(1, 2)]
  }
  
  return(out)
}

mynet <- myJRF_network(netout,out.perm,0.001)
mynet
## [[1]]
##        gene1 gene2
## 341   EEF1B2 AHNAK
## 714     RPS5 APEX1
## 2992  FCGR3A  CD14
## 3225    CD3D   CD2
## 4106   CXCR4  CD3G
## 4609    GZMK   CD6
## 7756   KLRB1  GNLY
## 8438   RACK1 IGF1R
## 10659  PTPRC  PRF1
## 10796   TFRC PTPRC
## 
## [[2]]
##          gene1    gene2
## 797        FGR ARHGAP25
## 1043     CASP8     BCL2
## 1702     PDE3B    CAMK4
## 3225      CD3D      CD2
## 3229       CD5      CD2
## 3267      GZMK      CD2
## 3277      IL7R      CD2
## 3852      CD3G     CD3D
## 3858      CD8A     CD3D
## 3892      GZMK     CD3D
## 3909     KLRD1     CD3D
## 4012      GZMB     CD3E
## 4179     PTPRC     CD3G
## 4838      GZMB     CD8A
## 4884     PTPRC     CD8A
## 5881      RPS8    EIF3E
## 6903     TIGIT    FCRL3
## 7459  TNFRSF18    FOXP3
## 8171     KLRB1     GZMK
## 8438     RACK1    IGF1R
## 8501      NKG7   IL10RA
## 10230   SPTBN1    NCAM1
## 10712 SLC9A3R1    PRKCA
## 10785     SELL    PTPRC
## 11411   TNFSF8  TNFRSF8
## 
## [[3]]
## [1] gene1 gene2
## <0 rows> (or 0-length row.names)
#https://cran.r-project.org/src/contrib/Archive/JRF/
#install.packages("/home/francescoc/Downloads/JRF_0.1-4.tar.gz", repos = NULL, type = "source")
set.seed(1234)
tictoc::tic("JRF")
jrf_mat <- infer_networks(count_matrices, method="JRF")
execution_times[['JRF']] <- tictoc::toc(log = TRUE)$toc[[1]]
## JRF: 1169.395 sec elapsed
jrf_mat[[1]] %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "JRF output")

Prepare the output

jrf_list <- list()

importance_columns <- grep("importance", names(jrf_mat[[1]]), value = TRUE)

for (i in seq_along(importance_columns)) {
  # Select the 'gene1', 'gene2', and the current 'importance' column
  df <- jrf_mat[[1]][, c("gene1", "gene2", importance_columns[i])]
  
  # Rename the importance column to its original name (e.g., importance1, importance2, etc.)
  names(df)[3] <- importance_columns[i]
  
  # Add the data frame to the output list
  jrf_list[[i]] <- df
}

saveRDS(jrf_list, "./../analysis/jrf.RDS")

jrf_list[[1]] %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "JRF output")

symmetrize Output and ROC

jrf_wadj <- generate_adjacency(jrf_list, ground.truth = adjm)
sjrf_wadj <- symmetrize(jrf_wadj, weight_function = "mean")
plotROC(sjrf_wadj, adjm, plot_title = "ROC curve - JRF Late Integration")

sjrf_wadj[[1]] %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "JRF symmetrize output")

Generate Adjacency and Apply Cutoff

sjrf_adj <- cutoff_adjacency(count_matrices = count_matrices,
                 weighted_adjm_list = sjrf_wadj, 
                 ground.truth = adjm,
                 n = 2,
                 method = "JRF")
## Matrix 1 Mean 95th Percentile Cutoff: 4.940559 
## Matrix 2 Mean 95th Percentile Cutoff: 4.907554 
## Matrix 3 Mean 95th Percentile Cutoff: 4.856606
sjrf_adj[[1]] %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "JRF adjacency")

Comparison with the Ground Truth

scores <- pscores(adjm, sjrf_adj)

scores$Statistics %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "scores")
plots <- plotg(sjrf_adj)

consesusm <- create_consensus(sjrf_adj, method="vote")
consesusu <- create_consensus(sjrf_adj, method="union")

scores <- pscores(adjm, list(consesusm))

scoresu <- pscores(adjm, list(consesusu))

scores$Statistics %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "scores")
scoresu$Statistics %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "scores")
ajm_compared <- compare_consensus(consesusm, adjm)

ajm_compared <- compare_consensus(consesusu, adjm)

Time analysis

sessionInfo()
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.2 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=it_IT.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=it_IT.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=it_IT.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=it_IT.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] parallel  stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] doRNG_1.8.2        rngtools_1.5.2     ZILGM_0.1.1        RColorBrewer_1.1-3
##  [5] rsvg_2.6.1         DiagrammeRsvg_0.1  JRF_0.1-4          pROC_1.18.0       
##  [9] DiagrammeR_1.0.11  gridExtra_2.3      reshape2_1.4.4     rbenchmark_1.0.0  
## [13] learn2count_0.3.2  reticulate_1.34.0  DT_0.22            forcats_0.5.1     
## [17] stringr_1.4.0      dplyr_1.0.9        purrr_0.3.4        readr_2.1.2       
## [21] tidyr_1.2.0        tibble_3.1.7       ggplot2_3.3.6      tidyverse_1.3.1   
## [25] igraph_2.0.3       doParallel_1.0.17  iterators_1.0.14   foreach_1.5.2     
## [29] GENIE3_1.16.0     
## 
## loaded via a namespace (and not attached):
##   [1] colorspace_2.0-3            ellipsis_0.3.2             
##   [3] XVector_0.34.0              GenomicRanges_1.46.1       
##   [5] fs_1.5.2                    rstudioapi_0.13            
##   [7] farver_2.1.0                fansi_1.0.3                
##   [9] lubridate_1.8.0             xml2_1.3.3                 
##  [11] splines_4.1.0               codetools_0.2-18           
##  [13] bst_0.3-24                  pscl_1.5.9                 
##  [15] knitr_1.39                  flux_0.3-0.1               
##  [17] jsonlite_1.8.0              broom_0.8.0                
##  [19] dbplyr_2.1.1                png_0.1-7                  
##  [21] graph_1.72.0                compiler_4.1.0             
##  [23] httr_1.4.3                  tictoc_1.2.1               
##  [25] backports_1.4.1             assertthat_0.2.1           
##  [27] Matrix_1.6-1.1              fastmap_1.1.0              
##  [29] cli_3.3.0                   distributions3_0.2.2       
##  [31] visNetwork_2.1.2            htmltools_0.5.2            
##  [33] tools_4.1.0                 coda_0.19-4                
##  [35] gtable_0.3.0                glue_1.6.2                 
##  [37] GenomeInfoDbData_1.2.7      V8_6.0.0                   
##  [39] Rcpp_1.0.8.3                Biobase_2.54.0             
##  [41] statnet.common_4.10.0       cellranger_1.1.0           
##  [43] jquerylib_0.1.4             vctrs_0.4.1                
##  [45] crosstalk_1.2.0             xfun_0.30                  
##  [47] network_1.18.2              rvest_1.0.2                
##  [49] lifecycle_1.0.1             MASS_7.3-57                
##  [51] zlibbioc_1.40.0             scales_1.2.0               
##  [53] hms_1.1.1                   MatrixGenerics_1.6.0       
##  [55] SummarizedExperiment_1.24.0 SingleCellExperiment_1.16.0
##  [57] yaml_2.3.5                  curl_4.3.2                 
##  [59] sass_0.4.1                  rpart_4.1.16               
##  [61] stringi_1.7.6               highr_0.9                  
##  [63] S4Vectors_0.32.4            caTools_1.18.2             
##  [65] BiocGenerics_0.40.0         shape_1.4.6                
##  [67] GenomeInfoDb_1.30.1         rlang_1.1.4                
##  [69] pkgconfig_2.0.3             matrixStats_0.62.0         
##  [71] bitops_1.0-7                evaluate_0.15              
##  [73] lattice_0.20-45             labeling_0.4.2             
##  [75] htmlwidgets_1.5.4           tidyselect_1.1.2           
##  [77] gbm_2.2.2                   plyr_1.8.7                 
##  [79] magrittr_2.0.3              R6_2.5.1                   
##  [81] IRanges_2.28.0              generics_0.1.2             
##  [83] DelayedArray_0.20.0         DBI_1.1.2                  
##  [85] pillar_1.7.0                haven_2.5.0                
##  [87] withr_2.5.0                 survival_3.3-1             
##  [89] RCurl_1.98-1.6              modelr_0.1.8               
##  [91] crayon_1.5.1                utf8_1.2.2                 
##  [93] iZID_0.0.1                  tzdb_0.3.0                 
##  [95] rmarkdown_2.14              grid_4.1.0                 
##  [97] readxl_1.4.0                WeightSVM_1.7-16           
##  [99] reprex_2.0.1                digest_0.6.29              
## [101] numDeriv_2016.8-1.1         mpath_0.4-2.26             
## [103] glmnet_4.1-8                stats4_4.1.0               
## [105] munsell_0.5.0               bslib_0.3.1